Tensorliststack
将多个张量(Tensor)堆叠成一个更大的张量。此算子可以处理不同数据类型的张量,将它们按顺序拼接成一个连续的内存块。
\[\text{output\_data} = [\text{tensor}_1, \text{tensor}_2, \ldots, \text{tensor}_n]\]
其中每个张量的数据类型和元素数量可以不同。
- 输入:
tensor_num - 张量数量,tensor_num > 0
tensor_element_nums - 每个张量的元素数量(int* 类型)
tensor_data_type - 每个张量元素的数据类型,以字节数表示
tensor_data - 每个张量数据的起始地址(void** 类型)
output_data - 输出结果的数组起始位置(void* 类型)
unknown_type_offset - 未知类型数据在输出结果中的偏移量
core_mask - 核掩码(int),仅共享存储版本需要
- 输出:
output_data - 堆叠后的张量数据,按输入顺序连续存储
- 支持平台:
FT78NEMT7004
备注
该算子不区分具体的数据类型,数据类型信息通过tensor_data_type参数传递
当tensor_data_type[i]为0(kTypeUnknown)时,算子会将输出内存清零
当tensor_data_type[i]不为0时,算子会按字节复制数据
调用前需要确保output_data指向的内存空间足够大以容纳所有张量数据
TensorList中不同的Tensor数据类型可能不同,类型信息已经在算子中包含
共享存储版本:
-
void fp_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
-
void i16_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
-
void c64_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
-
void hp_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
-
void i32_tensorlist_stack_s(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset, int core_mask);
C调用示例(共享存储版本):
1//FT78NE示例
2#include <stdio.h>
3#include <tensorliststack.h>
4#include <string.h>
5
6int main(int argc, char* argv[]) {
7 void* output_data = (void*)0x10010000;
8 void* check_data = (void*)0x10020000;
9 int tensor_num = 4; // 测试一个包含向量部分和尾部的数据长度
10
11 int tensor_element_nums[4] = {4096, 4096, 4096, 4096};
12 int tensor_data_type[4] = {4, 2, 4, 0};//4种数据类型
13
14 void* tensor_data[4] = {(void *)0x10030000, (void *)0x10040000, (void *)0x10050000, (void *)0x10060000};
15
16 srand(seed++);
17 // 初始化测试数据,包含各种情况
18 int i, j;
19
20 //tensor 1 int32
21 for(i = 0; i < tensor_element_nums[0]; i ++) {
22 ((int *)tensor_data[0])[i] = rand()%100;
23 }
24
25 //tensor 2 int16
26 for(i = 0; i < tensor_element_nums[1]; i ++) {
27 ((int16_t *)tensor_data[1])[i] = rand()%100;
28 }
29
30 //tensor 3 fp32
31 for (i = 0; i < tensor_element_nums[2]; i ++) {
32 ((float *)tensor_data[2])[i] = (float)rand()/RAND_MAX;
33 }
34
35 //tensor 4 fp16
36 for(i = 0; i < tensor_element_nums[3]; i ++) {
37 //类型为kTypeUnknown,不需要初始化
38 }
39 int core_mask = 0x0f;
40 fp_tensorlist_stack_s(tensor_num, tensor_element_nums, tensor_data_type, tensor_data, output_data, unknown_type_offset, core_mask);
41
42 return 0;
43}
私有存储版本:
-
void fp_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
-
void i16_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
-
void c64_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
-
void hp_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
-
void i32_tensorlist_stack_p(int tensor_num, int *tensor_element_nums, int *tensor_data_type, void **tensor_data, void *output_data, int unknown_type_offset);
C调用示例(私有存储版本):
1//FT78NE示例
2#include <stdio.h>
3#include <tensorliststack.h>
4#include <string.h>
5
6int main(int argc, char* argv[]) {
7 void* output_data = (void*)0x10010000;
8 void* check_data = (void*)0x10020000;
9 int tensor_num = 4; // 测试一个包含向量部分和尾部的数据长度
10
11 int tensor_element_nums[4] = {4096, 4096, 4096, 4096};
12 int tensor_data_type[4] = {4, 2, 4, 0};//4种数据类型
13
14 void* tensor_data[4] = {(void *)0x10030000, (void *)0x10040000, (void *)0x10050000, (void *)0x10060000};
15
16 srand(seed++);
17 // 初始化测试数据,包含各种情况
18 int i, j;
19
20 //tensor 1 int32
21 for(i = 0; i < tensor_element_nums[0]; i ++) {
22 ((int *)tensor_data[0])[i] = rand()%100;
23 }
24
25 //tensor 2 int16
26 for(i = 0; i < tensor_element_nums[1]; i ++) {
27 ((int16_t *)tensor_data[1])[i] = rand()%100;
28 }
29
30 //tensor 3 fp32
31 for (i = 0; i < tensor_element_nums[2]; i ++) {
32 ((float *)tensor_data[2])[i] = (float)rand()/RAND_MAX;
33 }
34
35 //tensor 4 fp16
36 for(i = 0; i < tensor_element_nums[3]; i ++) {
37 //类型为kTypeUnknown,不需要初始化
38 }
39
40 fp_tensorlist_stack_p(tensor_num, tensor_element_nums, tensor_data_type, tensor_data, output_data, unknown_type_offset);
41
42 return 0;
43}